global input "Q:\dc1prhcmsas01\PU2\data - sas"
global temp "Q:\dc1prhcmsas01\PU2\temp_stata_wr"
global log "Q:\dc1prhcmsas01\PU2\Log - Stata WR"
global output "Q:\dc1prhcmsas01\PU2\data_stata_wr"
global graphs "Q:\dc1prhcmsas01\PU2\graphs_stata_wr"
global tables "Q:\dc1prhcmsas01\PU2\tables_stata_wr"

cd "Q:\dc1prhcmsas01\PU2\Code - Stata WR"
set scheme plotplain
graph set window fontface "Times New Roman"

capture log close
log using "$log/10_reg_new_hires", replace

		
*Program to estimate regressions
cap program drop reg_est
program define reg_est 
args var name start end scale

	preserve 
	
	reghdfe `var' d_*, absorb(treat time) vce(cluster client0)
	
	cap drop beta ub lb
	gen beta=.
	gen ub=.
	gen lb=.
	gen event_time=.
	gen n=.
	gen spec="Basic"
	gen mean=.
		
	local index=1
	forvalues i=`start'/`end' {
		replace event_time=`i'*`scale' if _n==`index'
		replace n=`e(N)' if _n==`index'
		if `i'!=-1 {
			replace beta=_b[d_`index'] if _n==`index'
			replace ub=_b[d_`index'] + 1.96*_se[d_`index'] if _n==`index'
			replace lb=_b[d_`index'] - 1.96*_se[d_`index'] if _n==`index'	
		}
		if `i'==-1 {
			replace beta=0 if _n==`index'
			replace ub=0 if _n==`index'
			replace lb=0 if _n==`index'
		}
		local ++index
	}
	
	sum `var' if time<0
	replace mean= `r(mean)'
	
	keep if beta!=.
	keep event_time beta ub lb spec mean n
	
	*Save estimates
	save "$output/`name'_estimates.dta", replace
	restore 
	
	*Repeat with firm-date fixed effects
	preserve 
	reghdfe `var' d_*, absorb(treat time#client0) vce(cluster client0)
	
	cap drop beta ub lb
	gen beta=.
	gen ub=.
	gen lb=.
	gen event_time=.
	gen n=.
	gen spec="Within-Firm"
	gen mean=.
		
	local index=1
	forvalues i=`start'/`end' {
		replace event_time=`i'*`scale' if _n==`index'
		replace n=`e(N)' if _n==`index'
		if `i'!=-1 {
			replace beta=_b[d_`index'] if _n==`index'
			replace ub=_b[d_`index'] + 1.96*_se[d_`index'] if _n==`index'
			replace lb=_b[d_`index'] - 1.96*_se[d_`index'] if _n==`index'	
		}
		if `i'==-1 {
			replace beta=0 if _n==`index'
			replace ub=0 if _n==`index'
			replace lb=0 if _n==`index'
		}
		local ++index
	}
	sum `var' if time<0
	replace mean= `r(mean)'
	keep if beta!=.
	keep event_time beta ub lb spec mean n
	
	*Save estimates
	append using "$output/`name'_estimates.dta"
	save "$output/`name'_estimates.dta", replace
	restore 
end 
	
*********************************************************
*Clean baseline for new hires
*********************************************************
	use if base>=913 & base<993 using "$temp\new_hires_treat", clear
	rename yr_month month_hired
	rename client_id client0
	rename state state0
	rename salaried salaried0
	rename base base0 
	rename ot ot0
	rename gross gross0
	gen treat=bin==0
	drop bin
	save "$temp/hires_baseline", replace 
	
*********************************************************
*Fig 7a, A5 & Table 2: Past
*********************************************************

*For each new hire, identify last observed job
	use "$output\new_hires_panel", clear
	keep if salaried0==1
	gen months_diff=mofd(yr_month) - mofd(month_hired)
	count if months_diff==0 
	
	*How many second jobs? (It's okay to keep them since the wages at a second job is an indication of productivity. "Second job" might also be old job if last day at old job and first day in new job are in the same month.)
		gen second_job=months_diff==0 & client_id!=client0
		tab second_job if months_diff==0 
		
	*Last observed job
		gen previous_job=months_diff<=0 & client_id!=client0
		keep if previous_job==1
		egen months_last_job=max(months_diff), by(emp_pur_c)
		keep if months_last_job==months_diff 
		
		*Keep highest salary if worked two jobs at once previously
		egen base_max=max(base), by(emp_pur_c client0 state0 month_hired)
		keep if base_max==base
		egen gross_max=max(gross), by(emp_pur_c client0 state0 month_hired)
		keep if round(gross_max)==round(gross)
		duplicates drop emp_pur_c client0 state0 month_hired, force
		count 
		
	*Log base 
		gen log_base=log(base)
		
	*Change in base pay from last job 
		gen diff_base=(base0-base)/base
		
	*Probability of observing a previous job 
		merge 1:m emp_pur_c client0 state0 month_hired using "$temp/hires_baseline"
		gen observe=_merge==3 
		drop _merge
		
	*Drop if only temporary layoff 
		duplicates tag emp_pur_c client0, gen(tag)
		egen min_hired=min(month_hired), by(emp_pur_c client0)
		keep if min_hired==month_hired
		
	*Keep only first time someone is hired into a firm 
		bysort emp_pur_c client0 (month_hired): gen n=_n
		keep if n==1
		tab n, sum(observe)
		drop n
		
	*Drop outliers who get crazy changes in base pay 
		keep if (base>100 & base<=10000) | observe==0
		
	*Define event study variables
	gen time=mofd(month_hired)-mofd(mdy(5,01,2016))
	drop if time==24
	local index=1
	forvalues i=-2/3 {
		gen d_`index'= treat==1 & floor(time/6)==`i'
		label var d_`index' "Treat=1 and Event Time=`i'"
		local ++index
	}
	drop d_2 
	drop base_max gross_max previous_job months_last_job second_job
	
	save "$temp/past", replace
		
	*Regressions for figures
	reg_est base fig7_base -2 3 6
	reg_est log_base fig7_log_base -2 3 6 
	reg_est diff_base figA5_diff_base -2 3 6
	reg_est ot fig7_ot -2 3 6
	reg_est salaried fig7_salaried -2 3 6
	reg_est months_diff fig7_months -2 3 6
	reg_est observe fig7_observe -2 3 6
	
	*Repeat for last observed salary within 6 months
	preserve 
		keep if months_diff>=-6
		reg_est base fig7_base_recent -2 3 6
		reg_est log_base fig7_log_base_recent -2 3 6 
		reg_est diff_base figA5_diff_base_recent -2 3 6
		reg_est ot fig7_ot_recent -2 3 6
		reg_est salaried fig7_salaried_recent -2 3 6
		reg_est months_diff fig7_months_recent -2 3 6
		reg_est observe fig7_observe_recent -2 3 6
	restore 
	
	*Repeat for monthly treatment dummies
		drop d_*
		local index=1
		forvalues i=-12/23 {
			gen d_`index'= treat==1 & time==`i'
			label var d_`index' "Treat=1 and Event Time=`i'"
			local ++index
		}
		drop d_12
		
		reg_est base fig7_base_monthly -12 23 1
		reg_est log_base fig7_log_base_monthly -12 23 1
		reg_est diff_base figA5_diff_base_monthly -12 23 1
		reg_est ot fig7_ot_monthly -12 23 1
		reg_est salaried fig7_salaried_monthly -12 23 1
		reg_est months_diff fig7_months_monthly -12 23 1
		reg_est observe fig7_observe_monthly -12 23 1

	*Repeat using 0 for missing (i.e. assume missing means unemployed)
		replace base=0 if base==.
		reg_est base fig7_base_fill_zeros -2 3 6
		
*********************************************************
*Fig 7b & Table 2: Future
*********************************************************

*For each new hire, identify salary in 18 months
	use "$output\new_hires_panel", clear
	keep if salaried0==1
	gen months_diff=mofd(yr_month) - mofd(month_hired)
	keep if months_diff==18 & client_id==client0 
	duplicates drop emp_pur_c client0 state0 month_hired, force
	
	*Generate wage growth 
	gen log_base=log(base)
	gen wage_growth=(base-base0)/base0 
	
	*Probability of observing wages 18 month post at same firm
	merge 1:m emp_pur_c client0 state0 month_hired using "$temp/hires_baseline"
	gen observe_future=_merge==3 
	drop _merge

	*Define event study variables
	gen time=mofd(month_hired)-mofd(mdy(5,01,2016))
	drop if time==24
	local index=1
	forvalues i=-2/3 {
		gen d_`index'= treat==1 & floor(time/6)==`i'
		label var d_`index' "Treat=1 and Event Time=`i'"
		local ++index
	}
	drop d_2
		
	*Regressions for figures
	reg_est base fig7b_base_future -2 3 6
	reg_est log_base fig7b_log_base_future -2 3 6
	reg_est wage_growth fig7b_wage_growth -2 3 6
	reg_est ot fig7b_ot -2 3 6
	reg_est salaried fig7b_salaried -2 3 6
	reg_est observe_future fig7b_observe_future -2 3 6
	
	*Repeat using zeros for missing wages 
	preserve
		replace base=0 if observe==0 
		replace wage_growth=(base-base0)/base0 
		reg_est base fig7b_base_future_zeros -2 3 6
		reg_est log_base fig7b_log_base_future_zeros -2 3 6
		reg_est wage_growth fig7b_wage_growth_zeros -2 3 6
	restore
	
	*Repeat using monthly frequency
		drop d_*
		local index=1
		forvalues i=-12/23 {
			gen d_`index'= treat==1 & time==`i'
			label var d_`index' "Treat=1 and Event Time=`i'"
			local ++index
		}
		drop d_12
	reg_est base fig7b_base_future_month -12 23 1
	reg_est log_base fig7b_log_base_future_month -12 23 1
	reg_est wage_growth fig7b_wage_growth_month -12 23 1
	reg_est ot fig7b_ot_month -12 23 1
	reg_est salaried fig7b_salaried_month -12 23 1
	reg_est observe_future fig7b_observe_future_month -12 23 1
	
*********************************************************
*Repeat above using a balanced panel
*********************************************************
	*Save future sample
	keep if observe_future==1
	keep emp_pur_c client0 state0 month_hired base log_base wage_growth
	rename base base_future
	rename log_base log_base_future

	*Merge past
	merge 1:m emp_pur_c client0 state0 month_hired using "$temp/past"
	keep if observe==1 & _merge==3
	
	*Regressions 
	reg_est base fig7_base_balanced -2 3 6
	reg_est log_base fig7_log_base_balanced -2 3 6
	reg_est diff_base figA5_diff_base_balanced -2 3 6
	reg_est base_future fig7b_base_future_balanced -2 3 6
	reg_est log_base_future fig7b_log_base_future_balanced -2 3 6
	reg_est wage_growth fig7b_wage_growth_balanced -2 3 6
	
log close
